
# Load packages  
  library(tidyverse)
  library(haven)
  library(broom)
  library(lmtest)
  library(sandwich)
  library(stargazer)

# Calculate robust confidence intervals
  se_robust <- function(x)
    coeftest(x, vcov = vcovHC(x, type = "HC0"))[, "Std. Error"]
  p_robust <- function(x)
    coeftest(x, vcov = vcovHC(x, type = "HC0"))[, "Pr(>|t|)"]
  

  #### GLES 2021 pre-release wave 17
  
  # Load previous panel waves to retrieve Age, gender and education for wave 16 and wave 17 later on
  panel <- read_dta("ObsStudy_00_data_GLES_panel_2016_2021_waves_2017.dta")
  panel <- panel[, c(21, 25, 28, 79)]
  
  # Gender (1 male, 2 female)
  panel$kpx_2280 <- ifelse(panel$kpx_2280 == 2, 0, panel$kpx_2280)
  
  # Education
  panel$kp1_2320 <- ifelse(panel$kp1_2320 < 0, NA, panel$kp1_2320)
  
  #### wave 17
  # Load data
  data <- read_dta("ObsStudy_00_data_Germany_GLES_2021.dta")
  
  # Match with panel data by "lfdn" variable to have access to control variables
  dt <- merge(data, panel, by = c("lfdn"))   
  
  # Party rating
  colnames(dt)[seq(116, 122, 1)] <- c("rat_CDU", "rat_CSU", "rat_SPD",  "rat_AfD", "rat_FDP", "rat_LINKE", "rat_GRUENE")
  
  dt[, 116:122][dt[, 116:122] < 1] <- NA
  
  dt$rat_CDU <- as.numeric(dt$rat_CDU)
  dt$rat_CSU <- as.numeric(dt$rat_CSU)
  dt$rat_SPD <- as.numeric(dt$rat_SPD)
  dt$rat_FDP <- as.numeric(dt$rat_FDP)
  dt$rat_GRUENE <- as.numeric(dt$rat_GRUENE)
  dt$rat_LINKE <- as.numeric(dt$rat_LINKE)
  dt$rat_AfD <- as.numeric(dt$rat_AfD)
  
  
  # Coalition evaluations/preferred coalition
  colnames(dt)[seq(101, 105, 1)] <- c("rat_coal_CDU_CSU_SPD", "rat_coal_CDU_CSU_GRUENE", "rat_coal_GRUENE_SPD_FDP", "rat_coal_CDU_CSU_GRUENE_FDP", "rat_coal_GRUENE_SPD_LINKE")
  
  dt[, 101:105][dt[, 101:105] < 1] <- NA
  
  dt$rat_coal_CDU_CSU_SPD <- as.numeric(dt$rat_coal_CDU_CSU_SPD)
  dt$rat_coal_CDU_CSU_GRUENE <- as.numeric(dt$rat_coal_CDU_CSU_GRUENE)
  dt$rat_coal_GRUENE_SPD_FDP <- as.numeric(dt$rat_coal_GRUENE_SPD_FDP)
  dt$rat_coal_CDU_CSU_GRUENE_FDP <- as.numeric(dt$rat_coal_CDU_CSU_GRUENE_FDP)
  dt$rat_coal_GRUENE_SPD_LINKE <- as.numeric(dt$rat_coal_GRUENE_SPD_LINKE)
  
  
  # Coalition likelihood
  colnames(dt)[seq(106, 110, 1)] <- c("coallik_CDU_CSU_SPD", "coallik_CDU_CSU_GRUENE", "coallik_GRUENE_SPD_FDP", "coallik_CDU_CSU_GRUENE_FDP","coallik_GRUENE_SPD_LINKE")
  
  dt[, 106:110][dt[, 106:110] < 1] <- NA
  
  dt$coallik_CDU_CSU_SPD <- as.numeric(dt$coallik_CDU_CSU_SPD)
  dt$coallik_CDU_CSU_GRUENE <- as.numeric(dt$coallik_CDU_CSU_GRUENE)
  dt$coallik_GRUENE_SPD_FDP <- as.numeric(dt$coallik_GRUENE_SPD_FDP)
  dt$coallik_CDU_CSU_GRUENE_FDP <- as.numeric(dt$coallik_CDU_CSU_GRUENE_FDP)
  dt$coallik_GRUENE_SPD_LINKE <- as.numeric(dt$coallik_GRUENE_SPD_LINKE)
  
  # Dependent variable,  Beabsichtigte Stimmabgabe Zweitstimme
  dt[, 14][dt[, 14] < 1] <- NA
  dt$vote <- as.numeric(dt$kp17_190ba)
  
  # Assign interpretable labels to vote variable
  dt$vote[dt$vote==1] <- "UNION"
  dt$vote[dt$vote==4] <- "SPD"
  dt$vote[dt$vote==5] <- "FDP"
  dt$vote[dt$vote==6] <- "GRUENE"
  dt$vote[dt$vote==7] <- "LINKE"
  dt$vote[dt$vote==322] <- "AfD"
  dt$vote[dt$vote==801] <- "other party"
  
  
  # Generate ptv columns based on vote variable
  dt$ptv_UNION <- ifelse(dt$vote == "UNION", 1, 0)
  dt$ptv_SPD <- ifelse(dt$vote == "SPD", 1, 0)
  dt$ptv_FDP <- ifelse(dt$vote == "FDP", 1, 0)
  dt$ptv_GRUENE <- ifelse(dt$vote == "GRUENE", 1, 0)
  dt$ptv_LINKE <- ifelse(dt$vote == "LINKE", 1, 0)
  
  
  # Coalition evaluation
  Z <- dt %>% select(contains("rat_coal")) %>%
    mutate_all(as.numeric) %>%
    mutate_all(funs(scales::rescale(.,to = c(0, 1)))) 
  
  rat_coal_UNION <- Z %>% select(rat_coal_CDU_CSU_SPD, rat_coal_CDU_CSU_GRUENE, rat_coal_CDU_CSU_GRUENE_FDP) %>% as.matrix()
  rat_coal_SPD <- Z %>% select(rat_coal_CDU_CSU_SPD, rat_coal_GRUENE_SPD_FDP, rat_coal_GRUENE_SPD_LINKE) %>% as.matrix()
  rat_coal_FDP <- Z %>% select(rat_coal_GRUENE_SPD_FDP, rat_coal_CDU_CSU_GRUENE_FDP) %>% as.matrix()
  rat_coal_GRUENE <- Z %>% select(rat_coal_CDU_CSU_GRUENE, rat_coal_GRUENE_SPD_FDP, rat_coal_CDU_CSU_GRUENE_FDP, rat_coal_GRUENE_SPD_LINKE) %>% as.matrix()
  
  # Coalition likelihood
  gamma <- dt %>% select(contains("coallik_"))
  
  coal_lik_UNION <- gamma %>% select(coallik_CDU_CSU_SPD,  coallik_CDU_CSU_GRUENE, coallik_CDU_CSU_GRUENE_FDP) %>%
    mutate(sum_exp = exp(coallik_CDU_CSU_SPD) + exp(coallik_CDU_CSU_GRUENE) + exp(coallik_CDU_CSU_GRUENE_FDP),
           coallik_CDU_CSU_SPD = exp(coallik_CDU_CSU_SPD)/sum_exp,
           coallik_CDU_CSU_GRUENE = exp(coallik_CDU_CSU_GRUENE)/sum_exp,
           coallik_CDU_CSU_GRUENE_FDP = exp(coallik_CDU_CSU_GRUENE_FDP)/sum_exp) %>%
    select(-sum_exp) %>%
    as.matrix()
  
  
  coal_lik_SPD <- gamma %>% select(coallik_CDU_CSU_SPD, coallik_GRUENE_SPD_FDP , coallik_GRUENE_SPD_LINKE) %>%
    mutate(sum_exp = exp(coallik_CDU_CSU_SPD)  + exp(coallik_GRUENE_SPD_FDP) + exp(coallik_GRUENE_SPD_LINKE),
           coallik_CDU_CSU_SPD = exp(coallik_CDU_CSU_SPD)/sum_exp,
           coallik_GRUENE_SPD_FDP = exp(coallik_GRUENE_SPD_FDP)/sum_exp,
           coallik_GRUENE_SPD_LINKE = exp(coallik_GRUENE_SPD_LINKE)/sum_exp) %>%
    select(-sum_exp) %>%
    as.matrix()
  
  
  coal_lik_FDP <- gamma %>% select(coallik_GRUENE_SPD_FDP , coallik_CDU_CSU_GRUENE_FDP) %>%
    mutate(sum_exp =  exp(coallik_GRUENE_SPD_FDP) + exp(coallik_CDU_CSU_GRUENE_FDP),
           coallik_GRUENE_SPD_FDP = exp(coallik_GRUENE_SPD_FDP)/sum_exp,
           coallik_CDU_CSU_GRUENE_FDP = exp(coallik_CDU_CSU_GRUENE_FDP)/sum_exp) %>%
    select(-sum_exp) %>%
    as.matrix()
  
  
  coal_lik_GRUENE <- gamma %>% select(coallik_CDU_CSU_GRUENE, coallik_GRUENE_SPD_FDP, coallik_CDU_CSU_GRUENE_FDP, coallik_GRUENE_SPD_LINKE) %>%
    mutate(sum_exp = exp(coallik_CDU_CSU_GRUENE) + exp(coallik_GRUENE_SPD_FDP) + exp(coallik_CDU_CSU_GRUENE_FDP) + exp(coallik_GRUENE_SPD_LINKE),
           coallik_CDU_CSU_GRUENE = exp(coallik_CDU_CSU_GRUENE)/sum_exp,
           coallik_GRUENE_SPD_FDP = exp(coallik_GRUENE_SPD_FDP)/sum_exp,
           coallik_CDU_CSU_GRUENE_FDP = exp(coallik_CDU_CSU_GRUENE_FDP)/sum_exp,
           coallik_GRUENE_SPD_LINKE = exp(coallik_GRUENE_SPD_LINKE)/sum_exp) %>%
    select(-sum_exp) %>%
    as.matrix()
  
  
  # Mean-variance model
  EV <- function(gamma,Z){
    gamma %*% Z
  }
  
  Vari <- function(gamma,Z){
    gamma %*% ((Z - as.numeric(EV(gamma,Z)))^2)
  }
  
  
  N <- nrow(coal_lik_UNION)
  M_UNION <- sapply(1:N, function(i) EV(coal_lik_UNION[i,], rat_coal_UNION[i,]))
  V_UNION <- sapply(1:N, function(i) Vari(coal_lik_UNION[i,], rat_coal_UNION[i,]))
  
  N <- nrow(coal_lik_SPD)
  M_SPD <- sapply(1:N, function(i) EV(coal_lik_SPD[i,], rat_coal_SPD[i,]))
  V_SPD <- sapply(1:N, function(i) Vari(coal_lik_SPD[i,], rat_coal_SPD[i,]))
  
  N <- nrow(coal_lik_FDP)
  M_FDP <- sapply(1:N, function(i) EV(coal_lik_FDP[i,], rat_coal_FDP[i,]))
  V_FDP <- sapply(1:N, function(i) Vari(coal_lik_FDP[i,], rat_coal_FDP[i,]))
  
  N <- nrow(coal_lik_GRUENE)
  M_GRUENE <- sapply(1:N, function(i) EV(coal_lik_GRUENE[i,], rat_coal_GRUENE[i,]))
  V_GRUENE <- sapply(1:N, function(i) Vari(coal_lik_GRUENE[i,], rat_coal_GRUENE[i,]))
  
  
  # Sex: 1 (Male) und 2 (Female), generate a gender dummy "male"
  dt$male <- dt$kpx_2280

  # Age 
  dt$kpx_2290s <- ifelse(dt$kpx_2290s == "1955 und frueher", "1955", dt$kpx_2290s)
  dt$kpx_2290s <- as.numeric(dt$kpx_2290s)
  dt$age <- 2021 - dt$kpx_2290s

  # Education
  # omit "still at school" (9)
  dt$edu <- ifelse(dt$kp1_2320 == 9, NA, dt$kp1_2320)
  
  
  dt$kp17_2090a[dt$kp17_2090a<0] <- NA
  
  # PID
  if (!exists("robustnesscheck_pid")) {
    robustnesscheck_pid <- FALSE # PID as robustness?
  }
  dt$pid_UNION <- ifelse(dt$kp17_2090a==1, 1, 0)
  dt$pid_SPD <- ifelse(dt$kp17_2090a==4, 1, 0)
  dt$pid_FDP <- ifelse(dt$kp17_2090a==5, 1, 0)
  dt$pid_GRUENE <- ifelse(dt$kp17_2090a==6, 1, 0)
  dt$pid_LINKE <- ifelse(dt$kp17_2090a==7, 1, 0)
  
  d <- data.frame(
    "ptv" = dt$vote,
    "ptv_UNION" = dt$ptv_UNION,
    "ptv_SPD" = dt$ptv_SPD,
    "ptv_FDP" = dt$ptv_FDP,
    "ptv_GRUENE" = dt$ptv_GRUENE,
    "ptv_LINKE" = dt$ptv_LINKE,
    "rat.UNION" = (dt$rat_CDU + dt$rat_CSU)/2,
    "rat.SPD" = dt$rat_SPD,
    "rat.FDP" = dt$rat_FDP,
    "rat.GRUENE" = dt$rat_GRUENE,
    "rat.LINKE" = dt$rat_LINKE,
    "pid_UNION" = dt$pid_UNION,
    "pid_SPD" = dt$pid_SPD,
    "pid_FDP" = dt$pid_FDP,
    "pid_GRUENE" = dt$pid_GRUENE,
    "pid_LINKE" = dt$pid_LINKE,
    "lotterymean.UNION" = M_UNION,
    "lotteryvariance.UNION" = V_UNION,
    "lotterymean.SPD" = M_SPD,
    "lotteryvariance.SPD" = V_SPD,
    "lotterymean.FDP" = M_FDP,
    "lotteryvariance.FDP" = V_FDP,
    "lotterymean.GRUENE" = M_GRUENE,
    "lotteryvariance.GRUENE" = V_GRUENE,
    "sex" = dt$male,
    "age" = dt$age,
    "edu" = dt$edu
  )
  
  d$lotteryvariance.UNION <- scales::rescale(d$lotteryvariance.UNION, to = c(0, 1), from = c(0,0.25))
  d$lotteryvariance.SPD <- scales::rescale(d$lotteryvariance.SPD, to = c(0, 1), from = c(0,0.25))
  d$lotteryvariance.FDP <- scales::rescale(d$lotteryvariance.FDP, to = c(0, 1), from = c(0,0.25))
  d$lotteryvariance.GRUENE <- scales::rescale(d$lotteryvariance.GRUENE, to = c(0, 1), from = c(0,0.25))
  d$lotteryvariance.LINKE <- scales::rescale(d$lotteryvariance.LINKE, to = c(0, 1), from = c(0,0.25))
  
# Save model results
  summary(m1 <- lm(as.formula(paste("ptv_UNION ~  lotteryvariance.UNION + lotterymean.UNION + rat.UNION + sex + as.factor(edu) + age", 
                                    if (robustnesscheck_pid) "+ pid_UNION" else "")), d))
  summary(m2 <- lm(as.formula(paste("ptv_SPD ~ lotteryvariance.SPD + lotterymean.SPD + rat.SPD + sex + as.factor(edu) + age", 
                                    if (robustnesscheck_pid) "+ pid_SPD" else "")), d))
  summary(m3 <- lm(as.formula(paste("ptv_FDP ~  lotteryvariance.FDP + lotterymean.FDP + rat.FDP + sex + as.factor(edu) + age", 
                                    if (robustnesscheck_pid) "+ pid_FDP" else "")), d))
  summary(m4 <- lm(as.formula(paste("ptv_GRUENE ~ lotteryvariance.GRUENE + lotterymean.GRUENE + rat.GRUENE + sex + as.factor(edu) + age", 
                                    if (robustnesscheck_pid) "+ pid_GRUENE" else "")), d))
  
  GER_2021_GLES_Panel_w17_UNION_Variance_Estimate <- tidy(m1) %>% filter(term=="lotteryvariance.UNION") %>% select("estimate")
  GER_2021_GLES_Panel_w17_UNION_Variance_SE <- se_robust(m1)["lotteryvariance.UNION"] #tidy(m1) %>% filter(term=="lotteryvariance.UNION") %>% select("std.error")
  GER_2021_GLES_Panel_w17_UNION_Mean_Estimate <- tidy(m1) %>% filter(term=="lotterymean.UNION") %>% select("estimate")
  GER_2021_GLES_Panel_w17_UNION_Mean_SE <- se_robust(m1)["lotterymean.UNION"] #tidy(m1) %>% filter(term=="lotterymean.UNION") %>% select("std.error")
  
  GER_2021_GLES_Panel_w17_SPD_Variance_Estimate <- tidy(m2) %>% filter(term=="lotteryvariance.SPD") %>% select("estimate")
  GER_2021_GLES_Panel_w17_SPD_Variance_SE <- se_robust(m2)["lotteryvariance.SPD"] #tidy(m2) %>% filter(term=="lotteryvariance.SPD") %>% select("std.error")
  GER_2021_GLES_Panel_w17_SPD_Mean_Estimate <- tidy(m2) %>% filter(term=="lotterymean.SPD") %>% select("estimate")
  GER_2021_GLES_Panel_w17_SPD_Mean_SE <- se_robust(m2)["lotterymean.SPD"] #tidy(m2) %>% filter(term=="lotterymean.SPD") %>% select("std.error")
  
  GER_2021_GLES_Panel_w17_FDP_Variance_Estimate <- tidy(m3) %>% filter(term=="lotteryvariance.FDP") %>% select("estimate")
  GER_2021_GLES_Panel_w17_FDP_Variance_SE <- se_robust(m3)["lotteryvariance.FDP"] #tidy(m3) %>% filter(term=="lotteryvariance.FDP") %>% select("std.error")
  GER_2021_GLES_Panel_w17_FDP_Mean_Estimate <- tidy(m3) %>% filter(term=="lotterymean.FDP") %>% select("estimate")
  GER_2021_GLES_Panel_w17_FDP_Mean_SE <- se_robust(m3)["lotterymean.FDP"] #tidy(m3) %>% filter(term=="lotterymean.FDP") %>% select("std.error")
  
  GER_2021_GLES_Panel_w17_GR_Variance_Estimate <- tidy(m4) %>% filter(term=="lotteryvariance.GRUENE") %>% select("estimate")
  GER_2021_GLES_Panel_w17_GR_Variance_SE <- se_robust(m4)["lotteryvariance.GRUENE"] #tidy(m4) %>% filter(term=="lotteryvariance.GRUENE") %>% select("std.error")
  GER_2021_GLES_Panel_w17_GR_Mean_Estimate <- tidy(m4) %>% filter(term=="lotterymean.GRUENE") %>% select("estimate")
  GER_2021_GLES_Panel_w17_GR_Mean_SE <- se_robust(m4)["lotterymean.GRUENE"] #tidy(m4) %>% filter(term=="lotterymean.GRUENE") %>% select("std.error")
  
  
  names(m1$coefficients)[names(m1$coefficients) == "lotteryvariance.UNION"] <- "Government Lottery Variance"
  names(m1$coefficients)[names(m1$coefficients) == "lotterymean.UNION"] <- "Government Lottery Mean"
  names(m2$coefficients)[names(m2$coefficients) == "lotteryvariance.SPD"] <- "Government Lottery Variance"
  names(m2$coefficients)[names(m2$coefficients) == "lotterymean.SPD"] <- "Government Lottery Mean"
  names(m3$coefficients)[names(m3$coefficients) == "lotteryvariance.FDP"] <- "Government Lottery Variance"
  names(m3$coefficients)[names(m3$coefficients) == "lotterymean.FDP"] <- "Government Lottery Mean"
  names(m4$coefficients)[names(m4$coefficients) == "lotteryvariance.GRUENE"] <- "Government Lottery Variance"
  names(m4$coefficients)[names(m4$coefficients) == "lotterymean.GRUENE"] <- "Government Lottery Mean"
  
  
  if(!robustnesscheck_pid){
    stargazer(m1, m2, m3, m4, title="Linear regressions of vote choice on perceived government lottery variance and mean (GLES Panel 2016-2021, Wave 17, Pre-Release (2021)).", align=TRUE, 
              dep.var.labels=c("Union", "SPD", "FDP", "Greens"),
              omit.stat=c("LL","ser","f"),
              se = lapply(list(m1, m2, m3, m4), se_robust),
              p = lapply(list(m1, m2, m3, m4), p_robust),
              out = "TableSM13.tex")
  }
 
  
